#################################################################################
##############      Lab 10: Nonlinear forecasting     ############################
##############        TF + Feature Engineering      #############################
##############     ----------- solution ---------    ############################
#################################################################################
library(fpp2)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
## ── Attaching packages ────────────────────────────────────────────── fpp2 2.5 ──
## ✔ ggplot2   3.5.1      ✔ fma       2.5   
## ✔ forecast  8.23.0     ✔ expsmooth 2.3
## 
library(lmtest)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(tseries) #contains adf.test function
library(TSA)
## Registered S3 methods overwritten by 'TSA':
##   method       from    
##   fitted.Arima forecast
##   plot.Arima   forecast
## 
## Attaching package: 'TSA'
## The following objects are masked from 'package:stats':
## 
##     acf, arima
## The following object is masked from 'package:utils':
## 
##     tar
library(readxl)
library(MLTools)
##    __    __   _            _________
##   |  \  /  | | |          |___   ___| _____   _____   _      _____
##   |   \/   | | |     ____     | |    |  _  | |  _  | | |    |  ___|
##   | |\  /| | | |    |____|    | |    | | | | | | | | | |    |___  |
##   | | \/ | | | |____          | |    | |_| | | |_| | | |__   ___| |
##   |_|    |_| |______|         |_|    |_____| |_____| |____| |_____|
## 
##            Learning is fun, Machine Learning is funnier
##             -----------------------------------------
##            With great power comes great responsibility
##             -----------------------------------------
## Created by: Jos<e9> Portela Gonz<e1>lez <Jose.Portela@iit.comillas.edu>
## Guillermo Mestre Marcos <Guillermo.Mestre@comillas.edu> Jaime Pizarroso Gonzalo
## <jpizarroso@comillas.edu> Antonio Mu<f1>oz San Roque
## <antonio.munoz@iit.comillas.edu>
## 
## Escuela T<e9>cnica Superior de Ingenier<ed>a ICAI
library(imputeTS)
## 
## Attaching package: 'imputeTS'
## 
## The following object is masked from 'package:tseries':
## 
##     na.remove
## 
## The following object is masked from 'package:zoo':
## 
##     na.locf
## Set working directory ---------------------------------------------------------------------------------------------
try(setwd(dirname(rstudioapi::getActiveDocumentContext()$path)))

# Lectura datos -------------------------------------------------------------------------------------------------
fdata <- read_excel("DAILY_DEMAND_TR_NA.xlsx")

#Convert to time series object
fdata_ts <- ts(fdata, frequency=7)
autoplot(fdata_ts, facets = TRUE)

# Imputation of missing values
ggplot_na_distribution(fdata_ts[,4])

imp_seas <- na_seasplit(fdata_ts)
ggplot_na_imputations(fdata_ts[,4], imp_seas[,4])

imp_kalman <- na_kalman(fdata_ts, model = "auto.arima")
ggplot_na_imputations(fdata_ts[,4], imp_kalman[,4])

fdata_ts <- imp_seas

#Create time series 
y <- fdata_ts[,2]
x <- fdata_ts[,c(3,4)]
autoplot(cbind(y,x), facets = TRUE)

#Scale
y.tr <- y/100
x.tr <- x/1
autoplot(cbind(y.tr,x.tr), facets = TRUE)

## Identification and fitting process -------------------------------------------------------------------------------------------------------
#(The different steps followed are represented for teaching purposes)

## FIRST --------------------------------------------
#### Fit initial FT model with large s
#This arima function belongs to the TSA package
TF.fit <- arima(y.tr,
                xtransf = x.tr,
                order=c(1,0,0),
                seasonal = list(order=c(1,0,0),period=7),
                transfer = list(c(0,9),c(0,9)),
                method="ML")
#Check regression error to see the need of differentiation
TF.RegressionError.plot(y.tr,x.tr,TF.fit,lag.max = 50)
## Warning: Removed 9 rows containing missing values or values outside the scale range
## (`geom_point()`).

#Regular differentiation is needed and coefficients of explanatory variables cannot be interpreted


## SECOND --------------------------------------------
#### Fit initial FT model with large s
TF.fit <- arima(y.tr,
                xtransf = x.tr,
                order=c(1,1,0),
                seasonal = list(order=c(1,0,0),period=7),
                transfer = list(c(0,9),c(0,9)),
                method="ML")
## Warning in log(s2): NaNs produced
summary(TF.fit) #summary of training errors and estimated coefficients
## 
## Call:
## arima(x = y.tr, order = c(1, 1, 0), seasonal = list(order = c(1, 0, 0), period = 7), 
##     method = "ML", xtransf = x.tr, transfer = list(c(0, 9), c(0, 9)))
## 
## Coefficients:
##          ar1    sar1  WD-MA0  WD-MA1  WD-MA2   WD-MA3   WD-MA4   WD-MA5
##       0.0202  0.1081  7.7084  0.1133  0.0610  -0.0875  -0.0755  -0.0376
## s.e.  0.0227  0.0227  0.0857  0.0931  0.1052   0.1078   0.1098   0.1099
##        WD-MA6  WD-MA7   WD-MA8   WD-MA9  TEMP-MA0  TEMP-MA1  TEMP-MA2  TEMP-MA3
##       -0.0393  0.0273  -0.1601  -0.0428   -0.0080   -0.0091   -0.0028   -0.0051
## s.e.   0.1080  0.1052   0.0930   0.0853    0.0021    0.0021    0.0021    0.0021
##       TEMP-MA4  TEMP-MA5  TEMP-MA6  TEMP-MA7  TEMP-MA8  TEMP-MA9
##        -0.0027   -0.0023   -0.0052    0.0005   -0.0019    0.0048
## s.e.    0.0021    0.0021    0.0021    0.0021    0.0021    0.0021
## 
## sigma^2 estimated as 0.01934:  log likelihood = 1090.82,  aic = -2137.65
## 
## Training set error measures:
##                         ME      RMSE       MAE         MPE     MAPE      MASE
## Training set -0.0001495119 0.1390487 0.1017627 -0.01655897 1.462634 0.2114647
##                     ACF1
## Training set 0.002126683
coeftest(TF.fit) #statistical significance of estimated coefficients
## 
## z test of coefficients:
## 
##             Estimate  Std. Error z value  Pr(>|z|)    
## ar1       0.02021036  0.02272285  0.8894 0.3737725    
## sar1      0.10810555  0.02267399  4.7678 1.862e-06 ***
## WD-MA0    7.70838271  0.08571117 89.9344 < 2.2e-16 ***
## WD-MA1    0.11326613  0.09308661  1.2168 0.2236871    
## WD-MA2    0.06102610  0.10518577  0.5802 0.5617970    
## WD-MA3   -0.08754697  0.10784644 -0.8118 0.4169212    
## WD-MA4   -0.07551842  0.10983013 -0.6876 0.4917092    
## WD-MA5   -0.03758325  0.10986361 -0.3421 0.7322831    
## WD-MA6   -0.03929341  0.10795906 -0.3640 0.7158835    
## WD-MA7    0.02728403  0.10520079  0.2594 0.7953637    
## WD-MA8   -0.16007692  0.09302759 -1.7207 0.0852968 .  
## WD-MA9   -0.04281963  0.08531323 -0.5019 0.6157303    
## TEMP-MA0 -0.00800931  0.00212406 -3.7707 0.0001628 ***
## TEMP-MA1 -0.00913615  0.00212678 -4.2958 1.741e-05 ***
## TEMP-MA2 -0.00277048  0.00213453 -1.2979 0.1943095    
## TEMP-MA3 -0.00508456  0.00213635 -2.3800 0.0173115 *  
## TEMP-MA4 -0.00274090  0.00214421 -1.2783 0.2011492    
## TEMP-MA5 -0.00230439  0.00214644 -1.0736 0.2830088    
## TEMP-MA6 -0.00524246  0.00213781 -2.4523 0.0141964 *  
## TEMP-MA7  0.00046739  0.00213598  0.2188 0.8267922    
## TEMP-MA8 -0.00188518  0.00212468 -0.8873 0.3749309    
## TEMP-MA9  0.00480513  0.00212146  2.2650 0.0235116 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Check regression error to see the need of differentiation
TF.RegressionError.plot(y.tr,x.tr,TF.fit,lag.max = 50)
## Warning: Removed 9 rows containing missing values or values outside the scale range
## (`geom_point()`).

#Regression error is stationary, then, we can continue
#NOTE: If this regression error was not stationary in variance,boxcox should be applied to input and output series.
# Try AR(1)7 for seasonal component and ARMA(2,1) for regular component?
#Check numerator coefficients of explanatory variable
TF.Identification.plot(x.tr,TF.fit)
## [1] "WD"
##           Estimate Std. Error    z value   Pr(>|z|)
## WD-MA0  7.70838271 0.08571117 89.9344037 0.00000000
## WD-MA1  0.11326613 0.09308661  1.2167822 0.22368710
## WD-MA2  0.06102610 0.10518577  0.5801745 0.56179695
## WD-MA3 -0.08754697 0.10784644 -0.8117743 0.41692118
## WD-MA4 -0.07551842 0.10983013 -0.6875929 0.49170919
## WD-MA5 -0.03758325 0.10986361 -0.3420901 0.73228310
## WD-MA6 -0.03929341 0.10795906 -0.3639658 0.71588352
## WD-MA7  0.02728403 0.10520079  0.2593519 0.79536371
## WD-MA8 -0.16007692 0.09302759 -1.7207467 0.08529681
## WD-MA9 -0.04281963 0.08531323 -0.5019108 0.61573030
## [1] "TEMP"
##               Estimate  Std. Error    z value     Pr(>|z|)
## TEMP-MA0 -0.0080093112 0.002124064 -3.7707484 0.0001627587
## TEMP-MA1 -0.0091361462 0.002126778 -4.2957688 0.0000174089
## TEMP-MA2 -0.0027704842 0.002134532 -1.2979354 0.1943095499
## TEMP-MA3 -0.0050845641 0.002136349 -2.3800248 0.0173114730
## TEMP-MA4 -0.0027409037 0.002144205 -1.2782842 0.2011492341
## TEMP-MA5 -0.0023043904 0.002146445 -1.0735848 0.2830088091
## TEMP-MA6 -0.0052424635 0.002137813 -2.4522552 0.0141963920
## TEMP-MA7  0.0004673889 0.002135975  0.2188176 0.8267921694
## TEMP-MA8 -0.0018851765 0.002124682 -0.8872748 0.3749309447
## TEMP-MA9  0.0048051329 0.002121456  2.2650170 0.0235116353

#WD:    b=0, r=0, s=0
#TEMP:  b=0, r=0, s=1

## THIRD --------------------------------------------
#### Fit model with selected values and estimate initial ARIMA structure
TF.fit <- arima(y.tr,
                xtransf = x.tr,
                order=c(2,1,1),
                seasonal = list(order=c(1,0,0),period=7),
                transfer = list(c(0,0),c(0,1)),
                method="ML")
#Check regression error to identify correlation structure
summary(TF.fit) #summary of training errors and estimated coefficients
## 
## Call:
## arima(x = y.tr, order = c(2, 1, 1), seasonal = list(order = c(1, 0, 0), period = 7), 
##     method = "ML", xtransf = x.tr, transfer = list(c(0, 0), c(0, 1)))
## 
## Coefficients:
##          ar1      ar2      ma1    sar1  WD-MA0  TEMP-MA0  TEMP-MA1
##       0.9054  -0.0978  -0.9093  0.1313  7.7388   -0.0077   -0.0087
## s.e.  0.0304   0.0235   0.0211  0.0235  0.0444    0.0021    0.0021
## 
## sigma^2 estimated as 0.01908:  log likelihood = 1108.9,  aic = -2203.8
## 
## Training set error measures:
##                         ME      RMSE       MAE         MPE     MAPE      MASE
## Training set -0.0003025821 0.1380877 0.1009769 -0.03250181 1.446983 0.2098319
##                     ACF1
## Training set 0.002079806
coeftest(TF.fit) #statistical significance of estimated coefficients
## 
## z test of coefficients:
## 
##            Estimate Std. Error  z value  Pr(>|z|)    
## ar1       0.9053787  0.0304439  29.7393 < 2.2e-16 ***
## ar2      -0.0977585  0.0235450  -4.1520 3.296e-05 ***
## ma1      -0.9093086  0.0211119 -43.0709 < 2.2e-16 ***
## sar1      0.1312836  0.0234984   5.5869 2.311e-08 ***
## WD-MA0    7.7388244  0.0443777 174.3855 < 2.2e-16 ***
## TEMP-MA0 -0.0077486  0.0020891  -3.7091  0.000208 ***
## TEMP-MA1 -0.0087230  0.0020819  -4.1900 2.790e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Check residuals
CheckResiduals.ICAI(TF.fit, lag=50)
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_bin()`).

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(2,1,1)(1,0,0)[7]
## Q* = 65.171, df = 43, p-value = 0.01615
## 
## Model df: 7.   Total lags used: 50
PlotModelDiagnosis(x.tr, y.tr, fitted(TF.fit), together = TRUE)
## [1] "Residuals vs  WD"
## [1] "Residuals vs  TEMP"
## Don't know how to automatically pick scale for object of type <ts>. Defaulting
## to continuous.
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_bin()`).
## Don't know how to automatically pick scale for object of type <ts>. Defaulting
## to continuous.
## Don't know how to automatically pick scale for object of type <ts>. Defaulting
## to continuous.
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Don't know how to automatically pick scale for object of type <ts>. Defaulting
## to continuous.
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 1 row containing non-finite outside the scale range (`stat_smooth()`).
## Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Don't know how to automatically pick scale for object of type <ts>. Defaulting
## to continuous.
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 1 row containing non-finite outside the scale range (`stat_smooth()`).
## Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

###############################################################################
# Generate new temperature variables
###############################################################################
#Plot relation between temperature and demand
ggplot(fdata)+geom_point(aes(x=TEMP, y=DEM))
## Warning: Removed 23 rows containing missing values or values outside the scale range
## (`geom_point()`).

#We can see that there is a non-linear relation between them.
#Create new time series splitting the temperatures
fdata$tcold <- sapply(fdata$TEMP,min,17)
fdata$thot <- sapply(fdata$TEMP,max,22)

fdata_ts <- ts(fdata, frequency=7)
imp_seas <- na_seasplit(fdata_ts)
fdata_ts <- imp_seas


#Create time series 
y <- fdata_ts[,2]
x <- fdata_ts[,c(3,5,6)]
autoplot(cbind(y,x), facets = TRUE)

#Scale
y.tr <- y/100
x.tr <- x/1
autoplot(cbind(y.tr,x.tr), facets = TRUE)

## Identification and fitting process -------------------------------------------------------------------------------------------------------
#(The different steps followed are represented for teaching purposes)

## FIRST --------------------------------------------
#### Fit initial FT model with large s
#This arima function belongs to the TSA package
TF.fit <- arima(y.tr,
                xtransf = x.tr,
                order=c(1,0,0),
                seasonal = list(order=c(1,0,0),period=7),
                transfer = list(c(0,9),c(0,9),c(0,9)),
                method="ML")
## Warning in arima(y.tr, xtransf = x.tr, order = c(1, 0, 0), seasonal =
## list(order = c(1, : possible convergence problem: optim gave code=1
#Check regression error to see the need of differentiation
TF.RegressionError.plot(y.tr,x.tr,TF.fit,lag.max = 50)
## Warning: Removed 9 rows containing missing values or values outside the scale range
## (`geom_point()`).

#Regular differentiation is needed and coefficients of explanatory variables cannot be interpreted

## SECOND --------------------------------------------
#### Fit initial FT model with large s
TF.fit <- arima(y.tr,
                xtransf = x.tr,
                order=c(1,1,0),
                seasonal = list(order=c(1,0,0),period=7),
                transfer = list(c(0,9),c(0,9),c(0,9)),
                method="ML")
summary(TF.fit) #summary of training errors and estimated coefficients
## 
## Call:
## arima(x = y.tr, order = c(1, 1, 0), seasonal = list(order = c(1, 0, 0), period = 7), 
##     method = "ML", xtransf = x.tr, transfer = list(c(0, 9), c(0, 9), c(0, 9)))
## 
## Coefficients:
##           ar1    sar1  WD-MA0  WD-MA1  WD-MA2   WD-MA3   WD-MA4   WD-MA5
##       -0.1456  0.1889  7.6510  0.0654  0.0365  -0.1013  -0.0859  -0.0694
## s.e.   0.0225  0.0225  0.0726  0.0748  0.0840   0.0837   0.0856   0.0856
##        WD-MA6  WD-MA7   WD-MA8   WD-MA9  tcold-MA0  tcold-MA1  tcold-MA2
##       -0.0785  0.0604  -0.1382  -0.0301    -0.0359    -0.0262    -0.0115
## s.e.   0.0838  0.0840   0.0748   0.0723     0.0022     0.0022     0.0023
##       tcold-MA3  tcold-MA4  tcold-MA5  tcold-MA6  tcold-MA7  tcold-MA8
##         -0.0107    -0.0050    -0.0064    -0.0089     0.0023    -0.0023
## s.e.     0.0022     0.0022     0.0022     0.0022     0.0023     0.0022
##       tcold-MA9  thot-MA0  thot-MA1  thot-MA2  thot-MA3  thot-MA4  thot-MA5
##          0.0010    0.0737    0.0280    0.0120    0.0067    0.0066    0.0105
## s.e.     0.0023    0.0048    0.0048    0.0049    0.0048    0.0048    0.0048
##       thot-MA6  thot-MA7  thot-MA8  thot-MA9
##        -0.0028   -0.0034   -0.0004    0.0054
## s.e.    0.0048    0.0048    0.0048    0.0047
## 
## sigma^2 estimated as 0.01459:  log likelihood = 1368.46,  aic = -2672.93
## 
## Training set error measures:
##                         ME      RMSE        MAE         MPE     MAPE      MASE
## Training set -0.0002706268 0.1207643 0.08918052 -0.01419992 1.294395 0.1853187
##                     ACF1
## Training set -0.02673016
coeftest(TF.fit) #statistical significance of estimated coefficients
## 
## z test of coefficients:
## 
##             Estimate Std. Error  z value  Pr(>|z|)    
## ar1       -0.1456251  0.0225377  -6.4614 1.037e-10 ***
## sar1       0.1888777  0.0225230   8.3860 < 2.2e-16 ***
## WD-MA0     7.6509799  0.0726264 105.3471 < 2.2e-16 ***
## WD-MA1     0.0653920  0.0748163   0.8740  0.382100    
## WD-MA2     0.0365011  0.0839715   0.4347  0.663791    
## WD-MA3    -0.1012625  0.0836678  -1.2103  0.226167    
## WD-MA4    -0.0858998  0.0856087  -1.0034  0.315668    
## WD-MA5    -0.0694331  0.0856499  -0.8107  0.417560    
## WD-MA6    -0.0785434  0.0837879  -0.9374  0.348549    
## WD-MA7     0.0604056  0.0839908   0.7192  0.472022    
## WD-MA8    -0.1381522  0.0748328  -1.8461  0.064871 .  
## WD-MA9    -0.0301445  0.0723086  -0.4169  0.676761    
## tcold-MA0 -0.0359476  0.0022481 -15.9905 < 2.2e-16 ***
## tcold-MA1 -0.0262455  0.0022365 -11.7349 < 2.2e-16 ***
## tcold-MA2 -0.0115337  0.0022566  -5.1110 3.205e-07 ***
## tcold-MA3 -0.0106870  0.0022327  -4.7866 1.696e-06 ***
## tcold-MA4 -0.0049865  0.0022376  -2.2285  0.025847 *  
## tcold-MA5 -0.0063843  0.0022391  -2.8513  0.004354 ** 
## tcold-MA6 -0.0088805  0.0022370  -3.9698 7.194e-05 ***
## tcold-MA7  0.0023018  0.0022646   1.0164  0.309420    
## tcold-MA8 -0.0023003  0.0022419  -1.0261  0.304867    
## tcold-MA9  0.0010335  0.0022550   0.4583  0.646729    
## thot-MA0   0.0737167  0.0048458  15.2126 < 2.2e-16 ***
## thot-MA1   0.0279656  0.0048468   5.7698 7.934e-09 ***
## thot-MA2   0.0120275  0.0048601   2.4747  0.013334 *  
## thot-MA3   0.0067006  0.0047768   1.4027  0.160696    
## thot-MA4   0.0065706  0.0048185   1.3636  0.172686    
## thot-MA5   0.0105192  0.0048113   2.1864  0.028790 *  
## thot-MA6  -0.0027618  0.0047602  -0.5802  0.561792    
## thot-MA7  -0.0034115  0.0048115  -0.7090  0.478304    
## thot-MA8  -0.0003691  0.0047674  -0.0774  0.938289    
## thot-MA9   0.0053618  0.0047445   1.1301  0.258425    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Check regression error to see the need of differentiation
TF.RegressionError.plot(y.tr,x.tr,TF.fit,lag.max = 50)
## Warning: Removed 9 rows containing missing values or values outside the scale range
## (`geom_point()`).

#Regression error is stationary, then, we can continue
#NOTE: If this regression error was not stationary in variance,boxcox should be applied to input and output series.
# Try AR(1)7 for seasonal component and AR(2) for regular component?
#Check numerator coefficients of explanatory variable
TF.Identification.plot(x.tr,TF.fit)
## [1] "WD"
##           Estimate Std. Error     z value   Pr(>|z|)
## WD-MA0  7.65097995 0.07262636 105.3471462 0.00000000
## WD-MA1  0.06539198 0.07481626   0.8740344 0.38209955
## WD-MA2  0.03650111 0.08397145   0.4346847 0.66379130
## WD-MA3 -0.10126253 0.08366783  -1.2102922 0.22616677
## WD-MA4 -0.08589976 0.08560871  -1.0033998 0.31566801
## WD-MA5 -0.06943315 0.08564990  -0.8106623 0.41755960
## WD-MA6 -0.07854338 0.08378790  -0.9374073 0.34854910
## WD-MA7  0.06040560 0.08399081   0.7191930 0.47202200
## WD-MA8 -0.13815224 0.07483278  -1.8461461 0.06487099
## WD-MA9 -0.03014448 0.07230859  -0.4168866 0.67676135
## [1] "tcold"
##               Estimate  Std. Error     z value     Pr(>|z|)
## tcold-MA0 -0.035947568 0.002248058 -15.9905018 1.488274e-57
## tcold-MA1 -0.026245487 0.002236526 -11.7349368 8.438930e-32
## tcold-MA2 -0.011533690 0.002256645  -5.1109894 3.204759e-07
## tcold-MA3 -0.010687035 0.002232705  -4.7865854 1.696428e-06
## tcold-MA4 -0.004986461 0.002237583  -2.2285031 2.584698e-02
## tcold-MA5 -0.006384309 0.002239058  -2.8513377 4.353570e-03
## tcold-MA6 -0.008880489 0.002237033  -3.9697628 7.194420e-05
## tcold-MA7  0.002301777 0.002264546   1.0164408 3.094195e-01
## tcold-MA8 -0.002300317 0.002241911  -1.0260519 3.048671e-01
## tcold-MA9  0.001033506 0.002255029   0.4583114 6.467287e-01
## [1] "thot"
##               Estimate  Std. Error     z value     Pr(>|z|)
## thot-MA0  0.0737167194 0.004845770 15.21259006 2.917674e-52
## thot-MA1  0.0279656091 0.004846852  5.76984956 7.934233e-09
## thot-MA2  0.0120274631 0.004860126  2.47472243 1.333398e-02
## thot-MA3  0.0067005788 0.004776800  1.40273372 1.606963e-01
## thot-MA4  0.0065705912 0.004818478  1.36362370 1.726860e-01
## thot-MA5  0.0105192065 0.004811301  2.18635385 2.878973e-02
## thot-MA6 -0.0027618167 0.004760255 -0.58018253 5.617915e-01
## thot-MA7 -0.0034115020 0.004811487 -0.70903283 4.783041e-01
## thot-MA8 -0.0003690972 0.004767444 -0.07742036 9.382891e-01
## thot-MA9  0.0053618422 0.004744481  1.13012197 2.584248e-01

#WD:    b=0, r=0, s=0
#tcold: b=0, r=1, s=0
#thot:  b=0, r=1, s=0


## THIRD --------------------------------------------
#### Fit model with selected values and estimate initial ARIMA structure
TF.fit <- arima(y.tr,
                xtransf = x.tr,
                order=c(2,1,0),
                seasonal = list(order=c(1,0,0),period=7),
                transfer = list(c(0,0),c(1,0),c(1,0)),
                method="ML")
## Warning in log(s2): NaNs produced
## Warning in log(s2): NaNs produced
## Warning in log(s2): NaNs produced
## Warning in log(s2): NaNs produced
## Warning in arima(y.tr, xtransf = x.tr, order = c(2, 1, 0), seasonal =
## list(order = c(1, : possible convergence problem: optim gave code=1
#Check regression error to identify correlation structure
summary(TF.fit) #summary of training errors and estimated coefficients
## 
## Call:
## arima(x = y.tr, order = c(2, 1, 0), seasonal = list(order = c(1, 0, 0), period = 7), 
##     method = "ML", xtransf = x.tr, transfer = list(c(0, 0), c(1, 0), c(1, 0)))
## 
## Coefficients:
##           ar1      ar2    sar1  WD-MA0  tcold-AR1  tcold-MA0  thot-AR1
##       -0.1639  -0.1776  0.1586  7.8178     0.4405    -0.0374    0.3693
## s.e.   0.0237   0.0235  0.0229  0.0398     0.0675     0.0019    0.0416
##       thot-MA0
##         0.0718
## s.e.    0.0044
## 
## sigma^2 estimated as 0.01485:  log likelihood = 1357.61,  aic = -2699.21
## 
## Training set error measures:
##                        ME      RMSE        MAE         MPE     MAPE      MASE
## Training set -0.000344201 0.1218135 0.08987042 -0.02001728 1.302992 0.1867523
##                      ACF1
## Training set -0.009679965
coeftest(TF.fit) #statistical significance of estimated coefficients
## 
## z test of coefficients:
## 
##             Estimate Std. Error  z value  Pr(>|z|)    
## ar1       -0.1639247  0.0237467  -6.9031 5.089e-12 ***
## ar2       -0.1775620  0.0235003  -7.5557 4.165e-14 ***
## sar1       0.1585748  0.0229236   6.9175 4.596e-12 ***
## WD-MA0     7.8178409  0.0397517 196.6667 < 2.2e-16 ***
## tcold-AR1  0.4405121  0.0675286   6.5233 6.876e-11 ***
## tcold-MA0 -0.0373812  0.0019255 -19.4141 < 2.2e-16 ***
## thot-AR1   0.3693012  0.0416196   8.8732 < 2.2e-16 ***
## thot-MA0   0.0717739  0.0043749  16.4060 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Check residuals
CheckResiduals.ICAI(TF.fit, lag=50)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(2,1,0)(1,0,0)[7]
## Q* = 112.6, df = 42, p-value = 2.283e-08
## 
## Model df: 8.   Total lags used: 50
## FOURTH --------------------------------------------
#### lets try MA/(2) instead of AR(2)
#### start with (0,1,2)(1,0,0)7
TF.fit <- arima(y.tr,
                   xtransf = x.tr,
                   order=c(0,1,2),
                   seasonal = list(order=c(1,0,0),period=7),
                   transfer = list(c(0,0),c(1,0),c(1,0)),
                   method="ML")
## Warning in arima(y.tr, xtransf = x.tr, order = c(0, 1, 2), seasonal =
## list(order = c(1, : possible convergence problem: optim gave code=1
summary(TF.fit) #summary of training errors and estimated coefficients
## 
## Call:
## arima(x = y.tr, order = c(0, 1, 2), seasonal = list(order = c(1, 0, 0), period = 7), 
##     method = "ML", xtransf = x.tr, transfer = list(c(0, 0), c(1, 0), c(1, 0)))
## 
## Coefficients:
##           ma1      ma2    sar1  WD-MA0  tcold-AR1  tcold-MA0  thot-AR1
##       -0.2348  -0.2400  0.1586  7.7629     0.6552    -0.0382    0.4523
## s.e.   0.0223   0.0239  0.0225  0.0411     0.0208     0.0017    0.0339
##       thot-MA0
##         0.0698
## s.e.    0.0041
## 
## sigma^2 estimated as 0.0141:  log likelihood = 1408.26,  aic = -2800.51
## 
## Training set error measures:
##                         ME      RMSE      MAE         MPE     MAPE      MASE
## Training set -0.0004318267 0.1187317 0.087793 -0.02405313 1.272522 0.1824354
##                    ACF1
## Training set 0.01845859
coeftest(TF.fit) #statistical significance of estimated coefficients
## 
## z test of coefficients:
## 
##             Estimate Std. Error  z value  Pr(>|z|)    
## ma1       -0.2347890  0.0223162 -10.5210 < 2.2e-16 ***
## ma2       -0.2399553  0.0239227 -10.0305 < 2.2e-16 ***
## sar1       0.1585683  0.0224745   7.0555  1.72e-12 ***
## WD-MA0     7.7628533  0.0410811 188.9641 < 2.2e-16 ***
## tcold-AR1  0.6551798  0.0207928  31.5099 < 2.2e-16 ***
## tcold-MA0 -0.0382484  0.0016875 -22.6651 < 2.2e-16 ***
## thot-AR1   0.4523263  0.0339400  13.3272 < 2.2e-16 ***
## thot-MA0   0.0697618  0.0041500  16.8101 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Check residuals
CheckResiduals.ICAI(TF.fit, lag=50)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(0,1,2)(1,0,0)[7]
## Q* = 92.444, df = 42, p-value = 1.18e-05
## 
## Model df: 8.   Total lags used: 50
#If residuals are not white noise, change order of ARMA
#Remaning correlations in lags 4 and 5 and 14. Increase orders


## FIFTH --------------------------------------------
#### Try (0,1,5)(2,0,0)7
TF.fit <- arima(y.tr,
                xtransf = x.tr,
                order=c(0,1,5),
                seasonal = list(order=c(2,0,0),period=7),
                transfer = list(c(0,0),c(1,0),c(1,0)),
                method="ML")
## Warning in arima(y.tr, xtransf = x.tr, order = c(0, 1, 5), seasonal =
## list(order = c(2, : possible convergence problem: optim gave code=1
summary(TF.fit) #summary of training errors and estimated coefficients
## 
## Call:
## arima(x = y.tr, order = c(0, 1, 5), seasonal = list(order = c(2, 0, 0), period = 7), 
##     method = "ML", xtransf = x.tr, transfer = list(c(0, 0), c(1, 0), c(1, 0)))
## 
## Coefficients:
##           ma1      ma2      ma3      ma4      ma5    sar1    sar2  WD-MA0
##       -0.2247  -0.1993  -0.0568  -0.0816  -0.0576  0.1755  0.0458  7.7432
## s.e.   0.0228   0.0234   0.0228   0.0243   0.0230  0.0232  0.0228  0.0442
##       tcold-AR1  tcold-MA0  thot-AR1  thot-MA0
##          0.6573    -0.0390    0.4649    0.0687
## s.e.     0.0208     0.0018    0.0347    0.0043
## 
## sigma^2 estimated as 0.01385:  log likelihood = 1426.54,  aic = -2829.09
## 
## Training set error measures:
##                         ME     RMSE        MAE         MPE     MAPE      MASE
## Training set -0.0006101027 0.117636 0.08683689 -0.02893619 1.255731 0.1804486
##                       ACF1
## Training set -0.0001093044
coeftest(TF.fit) #statistical significance of estimated coefficients
## 
## z test of coefficients:
## 
##             Estimate Std. Error  z value  Pr(>|z|)    
## ma1       -0.2246741  0.0228241  -9.8437 < 2.2e-16 ***
## ma2       -0.1992616  0.0233867  -8.5203 < 2.2e-16 ***
## ma3       -0.0567997  0.0227573  -2.4959 0.0125644 *  
## ma4       -0.0815808  0.0243499  -3.3504 0.0008071 ***
## ma5       -0.0576490  0.0229939  -2.5071 0.0121710 *  
## sar1       0.1755026  0.0232275   7.5558 4.162e-14 ***
## sar2       0.0458038  0.0227501   2.0133 0.0440787 *  
## WD-MA0     7.7432120  0.0441636 175.3301 < 2.2e-16 ***
## tcold-AR1  0.6572825  0.0208206  31.5689 < 2.2e-16 ***
## tcold-MA0 -0.0389775  0.0017708 -22.0117 < 2.2e-16 ***
## thot-AR1   0.4649279  0.0347058  13.3963 < 2.2e-16 ***
## thot-MA0   0.0687455  0.0042881  16.0318 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Check residuals
CheckResiduals.ICAI(TF.fit, lag=50)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(0,1,5)(2,0,0)[7]
## Q* = 57.782, df = 38, p-value = 0.0208
## 
## Model df: 12.   Total lags used: 50
#If residuals are not white noise, change order of ARMA
#Correlations are modeled, But many MA terms not significant. Try AR alternative


## SIXTH --------------------------------------------
#### Try (5,1,0)(2,0,0)7
TF.fit <- arima(y.tr,
                   xtransf = x.tr,
                   order=c(5,1,0),
                   seasonal = list(order=c(2,0,0),period=7),
                   transfer = list(c(0,0),c(1,0),c(1,0)),
                   method="ML")
summary(TF.fit) #summary of training errors and estimated coefficients
## 
## Call:
## arima(x = y.tr, order = c(5, 1, 0), seasonal = list(order = c(2, 0, 0), period = 7), 
##     method = "ML", xtransf = x.tr, transfer = list(c(0, 0), c(1, 0), c(1, 0)))
## 
## Coefficients:
##           ar1      ar2      ar3      ar4      ar5    sar1    sar2  WD-MA0
##       -0.2169  -0.2415  -0.1363  -0.1487  -0.1268  0.1207  0.0591  7.7456
## s.e.   0.0225   0.0230   0.0235   0.0230   0.0231  0.0236  0.0226  0.0436
##       tcold-AR1  tcold-MA0  thot-AR1  thot-MA0
##          0.6584    -0.0386    0.4554    0.0697
## s.e.     0.0203     0.0017    0.0347    0.0042
## 
## sigma^2 estimated as 0.0139:  log likelihood = 1422.59,  aic = -2821.17
## 
## Training set error measures:
##                         ME      RMSE        MAE         MPE     MAPE      MASE
## Training set -0.0004034362 0.1178726 0.08688364 -0.02362039 1.258602 0.1805457
##                      ACF1
## Training set -0.003580911
coeftest(TF.fit) #statistical significance of estimated coefficients
## 
## z test of coefficients:
## 
##             Estimate Std. Error  z value  Pr(>|z|)    
## ar1       -0.2168945  0.0225211  -9.6307 < 2.2e-16 ***
## ar2       -0.2414818  0.0230057 -10.4966 < 2.2e-16 ***
## ar3       -0.1363313  0.0235230  -5.7957 6.805e-09 ***
## ar4       -0.1486906  0.0229862  -6.4687 9.885e-11 ***
## ar5       -0.1267777  0.0231496  -5.4765 4.339e-08 ***
## sar1       0.1207081  0.0235919   5.1165 3.113e-07 ***
## sar2       0.0590879  0.0226338   2.6106  0.009038 ** 
## WD-MA0     7.7455967  0.0436296 177.5308 < 2.2e-16 ***
## tcold-AR1  0.6584161  0.0203285  32.3888 < 2.2e-16 ***
## tcold-MA0 -0.0385911  0.0017080 -22.5948 < 2.2e-16 ***
## thot-AR1   0.4554008  0.0346716  13.1347 < 2.2e-16 ***
## thot-MA0   0.0696936  0.0042162  16.5299 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Check residuals
CheckResiduals.ICAI(TF.fit, lag=50)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(5,1,0)(2,0,0)[7]
## Q* = 55.53, df = 38, p-value = 0.03296
## 
## Model df: 12.   Total lags used: 50
#All coefficients significant --> Good.
#All correlations are small   --> Good.

#Check Cross correlation residuals - expl. variables
res <- residuals(TF.fit)
res[is.na(res)] <- 0
ccf(y = res, x = x.tr[,1])

ccf(y = res, x = x.tr[,2])

ccf(y = res, x = x.tr[,3])

########
#X1 seems correlated with residuals -> modify TF

## SEVENTH --------------------------------------------
#### Try (5,1,0)(2,0,0)7 and modify TF for X1
TF.fit <- arima(y.tr,
                   xtransf = x.tr,
                   order=c(5,1,0),
                   seasonal = list(order=c(2,0,0),period=7),
                   transfer = list(c(0,1),c(1,0),c(1,0)),
                   method="ML")
summary(TF.fit) #summary of training errors and estimated coefficients
## 
## Call:
## arima(x = y.tr, order = c(5, 1, 0), seasonal = list(order = c(2, 0, 0), period = 7), 
##     method = "ML", xtransf = x.tr, transfer = list(c(0, 1), c(1, 0), c(1, 0)))
## 
## Coefficients:
##           ar1     ar2      ar3      ar4      ar5    sar1    sar2  WD-MA0
##       -0.2183  -0.244  -0.1364  -0.1478  -0.1266  0.1241  0.0617  7.7495
## s.e.   0.0225   0.023   0.0235   0.0230   0.0232  0.0237  0.0227  0.0438
##       WD-MA1  tcold-AR1  tcold-MA0  thot-AR1  thot-MA0
##       0.0663     0.6578    -0.0384    0.4723    0.0701
## s.e.  0.0437     0.0205     0.0017    0.0343    0.0041
## 
## sigma^2 estimated as 0.01388:  log likelihood = 1423.61,  aic = -2821.22
## 
## Training set error measures:
##                         ME      RMSE        MAE         MPE     MAPE      MASE
## Training set -0.0004049858 0.1177685 0.08693924 -0.02056747 1.259037 0.1806613
##                      ACF1
## Training set -0.003618925
coeftest(TF.fit) #statistical significance of estimated coefficients
## 
## z test of coefficients:
## 
##             Estimate Std. Error  z value  Pr(>|z|)    
## ar1       -0.2183334  0.0224868  -9.7094 < 2.2e-16 ***
## ar2       -0.2440247  0.0230357 -10.5933 < 2.2e-16 ***
## ar3       -0.1363839  0.0235227  -5.7980 6.713e-09 ***
## ar4       -0.1477789  0.0229692  -6.4338 1.245e-10 ***
## ar5       -0.1265691  0.0231653  -5.4637 4.662e-08 ***
## sar1       0.1240664  0.0236593   5.2439 1.572e-07 ***
## sar2       0.0617365  0.0226829   2.7217  0.006494 ** 
## WD-MA0     7.7495250  0.0437929 176.9583 < 2.2e-16 ***
## WD-MA1     0.0662699  0.0437449   1.5149  0.129794    
## tcold-AR1  0.6578258  0.0205477  32.0145 < 2.2e-16 ***
## tcold-MA0 -0.0383677  0.0017128 -22.4003 < 2.2e-16 ***
## thot-AR1   0.4722826  0.0343140  13.7636 < 2.2e-16 ***
## thot-MA0   0.0701416  0.0041342  16.9661 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Check residuals
CheckResiduals.ICAI(TF.fit, lag=50)
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_bin()`).

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(5,1,0)(2,0,0)[7]
## Q* = 53.891, df = 37, p-value = 0.03589
## 
## Model df: 13.   Total lags used: 50
#All coefficients significant ???
#All correlations are small   --> Good.

#Check Cross correlation residuals - expl. variables
res <- residuals(TF.fit)
res[is.na(res)] <- 0
ccf(y = res, x = x.tr[,1])  # --> much better

ccf(y = res, x = x.tr[,2])

ccf(y = res, x = x.tr[,3])

## Finished model identification

#Check fitted
autoplot(y.tr, series = "Real")+
  forecast::autolayer(fitted(TF.fit), series = "Fitted")
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).

#Chech training errors of the model
accuracy(fitted(TF.fit),y.tr)
##                     ME      RMSE        MAE         MPE     MAPE         ACF1
## Test set -0.0004049858 0.1177685 0.08693924 -0.02056747 1.259037 -0.003618925
##          Theil's U
## Test set 0.1694156
PlotModelDiagnosis(x.tr, y.tr, fitted(TF.fit), together = TRUE)
## [1] "Residuals vs  WD"
## [1] "Residuals vs  tcold"
## [1] "Residuals vs  thot"
## Don't know how to automatically pick scale for object of type <ts>. Defaulting
## to continuous.
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_bin()`).
## Don't know how to automatically pick scale for object of type <ts>. Defaulting
## to continuous.
## Don't know how to automatically pick scale for object of type <ts>. Defaulting
## to continuous.
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Don't know how to automatically pick scale for object of type <ts>. Defaulting
## to continuous.
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 1 row containing non-finite outside the scale range (`stat_smooth()`).
## Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Don't know how to automatically pick scale for object of type <ts>. Defaulting
## to continuous.
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 1 row containing non-finite outside the scale range (`stat_smooth()`).
## Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Don't know how to automatically pick scale for object of type <ts>. Defaulting
## to continuous.
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 1 row containing non-finite outside the scale range (`stat_smooth()`).
## Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

#################################################################################
## Forecast for new data with h = 7
#################################################################################

x.tv <- read_excel("DAILY_DEMAND_TV.xlsx")
#divide temperatures for validation
x.tv$tcold <- sapply(x.tv$TEMP,min,17)
x.tv$thot <- sapply(x.tv$TEMP,max,22)

x.tv <- ts(x.tv[,c(2,4,5)])
#Obtain forecast for horizon = 7 using the trained parameters of the model
val.forecast_h7 <- TF.forecast(y.old = y.tr, #past values of the series
                              x.old = x.tr, #Past values of the explanatory variables
                              x.new = x.tv, #New values of the explanatory variables
                              model = TF.fit, #fitted transfer function model
                              h=7) #Forecast horizon

val.forecast_h7 * 100
## [1] 692.7268 643.1141 770.4921 782.3234 778.0349 656.0698 709.4874